“Files Used:”
Z:/COVID-19_WastewaterAnalysis/data/processed/MMSD_Interceptor_Cases_2_7_22.csv
Z:/COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv
MadDF <- filter(FullDF,Site=="Madison")%>%
mutate(FlagedOutliers = IdentifyOutliers(N1,Bin = 21, Action = "Flag"),
#Manual flagging that method misses due to boundary effect with binning
FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
TRUE, FlagedOutliers),
NoOutlierVar = ifelse(FlagedOutliers, NA, N1))
OutlierDF <- MadDF%>%
filter(FlagedOutliers)
FullDFMassRatio <- FullDF%>%
filter(!is.na(N1))%>%
mutate(SC2_mass = (3.785*1e6)*FlowRate*N1)
MissingIntercepter <- FullDFMassRatio%>%
filter(Site!="Madison",!is.na(FlowRate)|!is.na(N1))%>%
group_by(Date)%>%
summarize(N = n())%>%
filter(N!=5)
FullDFMassRatio.Mad <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site=="Madison")
FullDFMassRatio.Inter <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site!="Madison")#TempErrorMetric
TempErrorMetric <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(InterSum = sum(SC2_mass),InterSumFlow = sum(FlowRate))%>%
inner_join(FullDFMassRatio.Mad)%>%
mutate(MassRatio = SC2_mass/InterSum,
ErrorRatio = 2*(SC2_mass-InterSum)/(SC2_mass+InterSum),
MassRatioFlow = FlowRate/InterSumFlow,
ErrorRatioFlow = 2*(FlowRate-InterSumFlow)/(FlowRate+InterSumFlow))
FullDFMassRatio.Mad <- FullDFMassRatio%>%
filter(!is.na(SC2_mass))%>%
filter(Site=="Madison")%>%
inner_join(FullDFMassRatio.Inter["Date"])%>%
unique()
FullDFMassRatio.Inter <- FullDFMassRatio%>%
filter(Site!="Madison")
KeyOulierPoints <- c(ymd("2021-06-08"),
ymd("2021-10-17"),
ymd("2021-05-02"),
ymd("2021-01-26"))
NonOuliersDF <- MadDF%>%
mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
mutate(N1 = NoOutlierVar)%>%
filter(!(is.na(N1)))
OutLierPlotDF <- MadDF%>%
mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
mutate(N1 = NoOutlierVar)%>%
filter(!(is.na(N1)&is.na(Outlier)))%>%
ggplot(aes(x=Date))+#Data depends on time
geom_line(aes(y=N1,
color="N1",
info = N1),data=NonOuliersDF)+#compares N1
geom_point(aes(y=Outlier,
color="N1 Outlier",
info = Outlier))+
theme_light() +
scale_color_manual(values=c("#F8766D","#999999"))
ggplotly(OutLierPlotDF)
#"2021-06-08","2021-10-17","2021-05-02","2021-01-26"
Sum of interceptor flows agrees with composite flow except for dates with missing flow information for some interceptors. Purple boxes beneath data are to communicate the Dates with interceptor data partially missing.
MissingFlowData <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(n=n())%>%
filter(n!=5)%>%
mutate(MissingSite=TRUE)
FlowPlot <- FullDFMassRatio.Inter%>%
full_join(MissingFlowData)%>%
mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
filter(!is.na(FlowRate))%>%
ggplot()+
geom_col(aes(x=Date,y=FlowRate,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) +
geom_col(aes(x=Date,y=-.5),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=FlowRate),size=2)+
ylab("Average daily flow (MGD)") +
geom_hline(yintercept=median(FullDFMassRatio.Mad$FlowRate), linetype = "dashed", color="black") +
ggtitle("Average daily flow (MMSD)")
ggplotly(FlowPlot)%>%
add_annotations( text="Site, Full Data For Day", xref="paper", yref="paper",
x=1.02, xanchor="left",
y=0.8, yanchor="bottom", # Same y as legend below
legendtitle=TRUE, showarrow=FALSE ) %>%
layout( legend=list(y=0.8, yanchor="top" ) )
MassBalencePlot <- FullDFMassRatio.Inter%>%
full_join(MissingFlowData)%>%
mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
#filter(!is.na(FlowRate))%>%
#filter(!MissingSite)%>%
ggplot()+
geom_col(aes(x=Date,y=SC2_mass,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) +
geom_col(aes(x=Date,y=-3e12),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=SC2_mass),size=1)+
ylab("N1 gene copies per day") +
ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
ggplotly(MassBalencePlot)
N_One_Outliers <- c(ymd("2021-04-15"), ymd("2021-04-26"), ymd("2021-07-26"), ymd("2021-12-13"))
N_TWO_Outliers <- c(ymd("2021-04-15"), ymd("2021-05-20"))
FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
nIntercepter = n())%>%
inner_join(FullDFMassRatio.Mad)%>%
mutate(Ratio = (SC2_InterSum/SC2_mass)*(5/nIntercepter))%>%
arrange(Ratio)%>%
select(Ratio,nIntercepter, everything())#%>%
## # A tibble: 104 x 15
## Ratio nIntercepter Date SC2_InterSum Site FirstConfirmed
## <dbl> <int> <date> <dbl> <chr> <int>
## 1 0.159 3 2021-04-15 8.02e12 Madison 56
## 2 0.331 5 2021-07-26 8.16e12 Madison 24
## 3 0.340 5 2021-12-13 6.91e13 Madison 144
## 4 0.350 5 2021-05-31 6.25e12 Madison 3
## 5 0.408 5 2021-03-22 9.09e12 Madison NA
## 6 0.416 5 2021-08-19 1.27e13 Madison 94
## 7 0.471 5 2021-02-11 2.55e13 Madison 71
## 8 0.490 4 2021-06-21 4.56e12 Madison NA
## 9 0.502 5 2021-09-20 3.28e13 Madison 179
## 10 0.569 5 2021-08-23 2.43e13 Madison 53
## # ... with 94 more rows, and 9 more variables: SevenDayMACases <dbl>, N1 <dbl>,
## # BCoV <dbl>, PMMoV <dbl>, GeoMeanN12 <dbl>, FlowRate <dbl>,
## # temperature <dbl>, equiv_sewage_amt <dbl>, SC2_mass <dbl>
#summarise(mean(Ratio))
LowRatioN_One <- c("2021-04-15","2021-07-26","2021-12-13","2021-05-31","2021-03-22")
LowRatioN_TWO <- c("2021-04-15","2021-07-26","2021-12-13","2021-10-04","2021-03-22")
InterceptRatioPlot <- FullDFMassRatio.Inter%>%
group_by(Date)%>%
summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
nIntercepter = n())%>%
inner_join(FullDFMassRatio)%>%
mutate(Ratio = (SC2_mass/SC2_InterSum)*(nIntercepter/5))%>%
arrange(Ratio)%>%
select(Ratio,nIntercepter, everything())%>%
ggplot(aes(x=Date))+
geom_line(aes(y= Ratio,color = Site))
ggplotly(InterceptRatioPlot)
IntercepterOverLay <- FullDFMassRatio.Inter%>%
complete(Date,Site)%>%
full_join(FullDFMassRatio)%>%
ggplot(aes(x=Date))+
geom_line(aes(y=N1,color = Site))+
scale_y_log10()
ggplotly(IntercepterOverLay)
N1BalencePlot <- FullDFMassRatio.Inter%>%
complete(Date,Site)%>%
ggplot()+
geom_col(aes(x=Date,y=N1,fill=Site),
position = "dodge", stat="identity", width = 2) +
geom_col(aes(x=Date,y=-3e3),
data=MissingFlowData,color="Purple",fill="Purple", width = 2)+
scale_fill_brewer(palette="Spectral") +
scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
theme_light() +
geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=N1),size=1)+
scale_y_log10()+
ylab("N1 gene copies per day") +
ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
ggplotly(N1BalencePlot)
FullDFMassRatio.Inter%>%
group_by(Site)%>%
summarise(N1 = mean(N1,na.rm=TRUE),
Flow = mean(FlowRate,na.rm=TRUE),
SC2 = mean(SC2_mass,na.rm=TRUE),
AntiSC2 = mean(N1/FlowRate,na.rm=TRUE))#%>%
## # A tibble: 5 x 5
## Site N1 Flow SC2 AntiSC2
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 MMSD-P11 294443. 8.58 9.04e12 34459.
## 2 MMSD-P18 403384. 11.2 1.69e13 36331.
## 3 MMSD-P2 313275. 5.85 6.57e12 53950.
## 4 MMSD-P7 356002. 4.40 5.86e12 87100.
## 5 MMSD-P8 383066. 6.27 8.66e12 61481.
#ungroup()%>%
#summarise(VarN1 = var(N1)/mean(N1)^2,
# VarFlow = var(Flow)/mean(Flow)^2,
# VarSC2 = var(SC2)/mean(SC2)^2,
# VarASC2 = var(AntiSC2)/mean(AntiSC2)^2)
#Graph paportion of data intercepters are
arranged plots with lines to communicate where flagged outliers are. Red Lines show Where there was a flagged on a day that also has collected interceptor data.
StartRange <- mdy("1-20-2021")
EndRange <- mdy("1-10-2022")
AllOuliersDates <- OutlierDF[["Date"]]
OutlierDatesIntercepters <- unique(inner_join(FullDFMassRatio.Inter,OutlierDF["Date"],by=c("Date"))$Date)
a <- OutLierPlotDF+
scale_x_date(limits = c(StartRange,EndRange))+
#geom_vline(xintercept = AllOuliersDates)+
geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
#geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
theme(title = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position="none")
b <- MassBalencePlot+
scale_x_date(limits = c(StartRange,EndRange))+
#geom_vline(xintercept = (AllOuliersDates))+
geom_vline(xintercept = (OutlierDatesIntercepters),color="Red")+
#geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
theme(title = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position="none")
c <- FlowPlot+
scale_x_date(limits = c(StartRange,EndRange))+
#geom_vline(xintercept = AllOuliersDates)+
geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
theme(title = element_blank(),
legend.position="none")
ggarrange(a,b,c,nrow=3,align ="v",legend.grob=get_legend(MassBalencePlot),legend="right")
#Cause of missing data
OutlierDatesIntercepters
## [1] "2021-04-15" "2021-04-26" "2021-07-26" "2021-12-13"
N_One_Outliers <- c(ymd("2021-04-15"), ymd("2021-04-26"), ymd("2021-07-26"), ymd("2021-12-13"))
N_TWO_Outliers <- c(ymd("2021-04-15"), ymd("2021-05-20"))
LowRatioN_One <- c("2021-04-15","2021-07-26","2021-12-13","2021-05-31","2021-03-22")
LowRatioN_TWO <- c("2021-04-15","2021-07-26","2021-12-13","2021-10-04","2021-03-22")
AllIntercepterComp_POI <- c(N_One_Outliers,N_TWO_Outliers,LowRatioN_One,LowRatioN_TWO)
FullDF%>%
filter(Date %in% AllIntercepterComp_POI)%>%
select(-FirstConfirmed,SevenDayMACases)
## Date Site SevenDayMACases N1 BCoV PMMoV GeoMeanN12
## 1 2021-03-22 MMSD-P11 3.600000 74718 7.233 13995222 74718
## 2 2021-03-22 MMSD-P18 3.600000 87689 9.071 5087805 87689
## 3 2021-03-22 MMSD-P2 6.000000 41663 6.174 12906803 41663
## 4 2021-03-22 MMSD-P7 7.400000 43453 0.741 13088946 43453
## 5 2021-03-22 MMSD-P8 8.600000 51290 5.706 19118442 51290
## 6 2021-04-15 MMSD-P11 11.285714 157424 3.626 45532010 157424
## 7 2021-04-15 MMSD-P18 10.142857 NA NA NA NA
## 8 2021-04-15 MMSD-P2 10.285714 48521 3.533 26162914 48521
## 9 2021-04-15 MMSD-P7 11.428571 NA NA NA NA
## 10 2021-04-15 MMSD-P8 11.857143 60518 5.001 20604026 60518
## 11 2021-04-26 MMSD-P11 4.857143 173731 8.414 32400873 173731
## 12 2021-04-26 MMSD-P18 5.333333 1761146 19.407 18185986 1761146
## 13 2021-04-26 MMSD-P2 5.833333 66599 17.244 169992062 66599
## 14 2021-04-26 MMSD-P7 7.666667 145908 12.930 13107287 145908
## 15 2021-04-26 MMSD-P8 8.166667 181856 11.324 20215977 181856
## 16 2021-05-20 MMSD-P11 2.714286 9997 2.871 66800100 9997
## 17 2021-05-20 MMSD-P18 2.857143 1945496 6.505 15415932 1945496
## 18 2021-05-20 MMSD-P2 3.571429 608010 9.994 44245555 608010
## 19 2021-05-20 MMSD-P7 3.571429 56388 2.936 32473853 56388
## 20 2021-05-20 MMSD-P8 3.000000 15247 5.676 18441997 15247
## 21 2021-05-31 MMSD-P11 1.428571 3972 3.353 31740972 3972
## 22 2021-05-31 MMSD-P18 1.333333 120744 4.302 31643578 120744
## 23 2021-07-26 MMSD-P11 4.428571 56082 1.730 34062331 56082
## 24 2021-07-26 MMSD-P18 3.714286 96259 4.820 11582896 96259
## 25 2021-07-26 MMSD-P2 5.428571 87920 7.879 42669683 87920
## 26 2021-07-26 MMSD-P7 6.285714 22234 8.842 21276643 22234
## 27 2021-07-26 MMSD-P8 7.571429 14820 9.955 24223424 14820
## 28 2021-10-04 MMSD-P11 21.857143 132758 3.220 30769148 132758
## 29 2021-10-04 MMSD-P18 25.142857 100502 0.800 9759807 100502
## 30 2021-10-04 MMSD-P2 26.285714 220932 3.560 13670096 220932
## 31 2021-10-04 MMSD-P7 29.285714 137332 3.210 14498503 137332
## 32 2021-10-04 MMSD-P8 27.857143 172914 1.680 20682700 172914
## 33 2021-12-13 MMSD-P11 19.000000 458847 16.130 86006923 458847
## 34 2021-12-13 MMSD-P18 21.000000 722770 12.720 13380021 722770
## 35 2021-12-13 MMSD-P2 34.000000 429555 2.325 33305288 429555
## 36 2021-12-13 MMSD-P7 49.285714 300888 2.231 31784189 300888
## 37 2021-12-13 MMSD-P8 56.285714 443364 8.883 18625898 443364
## 38 2021-03-22 Madison 35.000000 162650 1.035 16649039 162650
## 39 2021-04-15 Madison 50.285714 571269 3.684 23628196 571269
## 40 2021-04-26 Madison 39.333333 675748 16.079 28619982 675748
## 41 2021-05-20 Madison 19.666667 224021 14.337 49311472 224021
## 42 2021-05-31 Madison 6.000000 147362 3.645 37833422 147362
## 43 2021-07-26 Madison 43.333333 186460 3.589 30230924 186460
## 44 2021-10-04 Madison 90.000000 244225 2.350 12107928 244225
## 45 2021-12-13 Madison 229.333333 1498471 13.886 24525524 1498471
## 46 2021-05-31 MMSD-P2 NA 13114 4.599 29170047 13114
## 47 2021-05-31 MMSD-P7 NA 8117 1.276 30386513 8117
## 48 2021-05-31 MMSD-P8 NA 58037 5.907 19836360 58037
## FlowRate temperature equiv_sewage_amt
## 1 9.02 NA 1.000
## 2 11.52 NA 1.000
## 3 5.81 NA 1.000
## 4 3.67 NA 1.000
## 5 6.18 NA 1.000
## 6 8.91 NA 1.000
## 7 NA NA NA
## 8 6.33 NA 1.000
## 9 NA NA NA
## 10 6.78 NA 1.000
## 11 8.62 NA 0.250
## 12 10.79 NA 0.250
## 13 5.84 NA 0.250
## 14 4.41 NA 0.250
## 15 6.38 NA 0.250
## 16 8.61 NA 0.625
## 17 11.32 NA 0.625
## 18 5.52 NA 0.625
## 19 3.65 NA 0.625
## 20 6.88 NA 0.625
## 21 7.88 NA 0.625
## 22 9.98 NA 0.625
## 23 8.44 NA 1.000
## 24 10.28 NA 1.000
## 25 5.74 NA 1.000
## 26 4.52 NA 1.000
## 27 5.95 NA 1.000
## 28 8.47 NA 0.625
## 29 10.08 NA 0.625
## 30 6.74 NA 0.625
## 31 4.35 NA 0.625
## 32 6.27 NA 0.625
## 33 8.16 NA 0.625
## 34 10.74 NA 0.625
## 35 6.14 NA 0.625
## 36 4.65 NA 0.625
## 37 6.09 NA 0.625
## 38 36.20 NA 1.000
## 39 39.02 NA 1.000
## 40 36.04 NA 0.250
## 41 35.98 NA 0.625
## 42 32.02 NA 0.625
## 43 34.93 NA 1.000
## 44 35.91 NA 0.625
## 45 35.78 NA 0.625
## 46 4.74 NA 0.625
## 47 3.91 NA 0.625
## 48 5.51 NA 0.625